/******************************************************************************* * Copyright (c) 2000, 2011 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package org.eclipse.swt.internal; import org.eclipse.swt.internal.motif.*; /** * This class implements the conversions between unicode characters * and the <em>platform supported</em> representation for characters. * <p> * Note that, unicode characters which can not be found in the platform * encoding will be converted to an arbitrary platform specific character. * </p> */ public final class Converter { static final byte [] NULL_BYTE_ARRAY = new byte [1]; static final byte [] EMPTY_BYTE_ARRAY = new byte [0]; static final char [] EMPTY_CHAR_ARRAY = new char [0]; static String CodePage; static final byte[] UCS2; static final byte[] UTF8; static final Object LOCK = new Object (); /* Converter cache */ static boolean LastMbcsToUCS2Failed, LastUCS2ToMbcsFailed; static String LastMbcsToUCS2CodePage; static String LastUCS2ToMbcsCodePage; static int LastUCS2ToMbcs = -1; static int LastUTF8ToMbcs = -1; static int LastMbcsToUCS2 = -1; static int LastMbcsToUTF8 = -1; static int UTF8ToUCS2 = -1; static int UCS2ToUTF8 = -1; /* Buffers cache */ static int BufferSize; static int MbcsBuffer, Ucs2Buffer, Utf8Buffer; static { if (OS.IsHPUX) { UCS2 = getAsciiBytes("ucs2"); UTF8 = getAsciiBytes("utf8"); } else { UCS2 = getAsciiBytes("UCS-2"); UTF8 = getAsciiBytes("UTF-8"); } int length, item = OS.nl_langinfo (OS.CODESET); if (item != 0 && (length = OS.strlen (item)) > 0) { byte [] buffer = new byte [length]; OS.memmove (buffer, item, length); CodePage = new String (buffer); if (OS.IsSunOS) { if (length > 3 && CodePage.indexOf ("ISO") == 0) { CodePage = CodePage.substring (3, length); } } } else { if (OS.IsLinux) CodePage = "ISO-8859-1"; else if (OS.IsAIX) CodePage = "ISO8859-1"; else if (OS.IsSunOS) CodePage = "8859-1"; else if (OS.IsHPUX) CodePage = "iso88591"; else CodePage = "iso8859_1"; } /* * The buffers can hold up to 512 unicode characters when converting * from UCS-2 to any MBCS (including UTF-8). And they can hold * at least 512 MBCS characters when converting from any MBCS to * UCS-2. */ BufferSize = 512; Ucs2Buffer = OS.XtMalloc (BufferSize * 2); Utf8Buffer = OS.XtMalloc (BufferSize * 6); MbcsBuffer = OS.XtMalloc (BufferSize * 6); } /** * Returns the default code page for the platform where the * application is currently running. * * @return the default code page */ public static String defaultCodePage () { return CodePage; } static byte[] getAsciiBytes (String str) { int length = str.length (); byte [] buffer = new byte [length + 1]; for (int i=0; i<length; i++) { buffer [i] = (byte)str.charAt (i); } return buffer; } static String getAsciiString (byte [] buffer) { int length = buffer.length; char [] chars = new char [length]; for (int i=0; i<length; i++) { chars [i] = (char)buffer [i]; } return new String (chars); } /** * Converts an array of bytes representing the platform's encoding, * in the given code page, of some character data into an array of * matching unicode characters. * * @param codePage the code page to use for conversion * @param buffer the array of bytes to be converted * @return the unicode conversion */ public static char [] mbcsToWcs (String codePage, byte [] buffer) { /* Check for the simple cases */ if (buffer == null) { return EMPTY_CHAR_ARRAY; } int length = buffer.length; if (length == 0) { return EMPTY_CHAR_ARRAY; } /* * Optimize for English ASCII encoding. If no conversion is * performed, it is safe to return any object that will also not * be converted if this routine is called again with the result. * This ensures that double conversion will not be performed * on the same bytes. Note that this relies on the fact that * lead bytes are never in the range 0..0x7F. */ char [] wideCharStr = new char [length]; for (int i=0; i<length; i++) { if ((buffer [i] & 0xFF) <= 0x7F) { wideCharStr [i] = (char) buffer [i]; // all bytes <= 0x7F, so no ((char) (buffer[i]&0xFF)) needed } else { synchronized (LOCK) { /* * Feature in Solaris. Some Solaris machines do not provide an iconv * decoder/encoder that converts directly from/to any MBCS encoding to/from * USC-2. The fix is to convert to UTF-8 enconding first and them * convert to UCS-2. */ String cp = codePage != null ? codePage : CodePage; if (cp != LastMbcsToUCS2CodePage && !cp.equals (LastMbcsToUCS2CodePage)) { if (LastMbcsToUCS2 != -1) OS.iconv_close (LastMbcsToUCS2); if (LastMbcsToUTF8 != -1) OS.iconv_close (LastMbcsToUTF8); LastMbcsToUCS2 = LastMbcsToUTF8 = -1; LastMbcsToUCS2CodePage = cp; LastMbcsToUCS2Failed = false; } int cd = LastMbcsToUCS2; if (cd == -1 && !LastMbcsToUCS2Failed) { cd = LastMbcsToUCS2 = OS.iconv_open (UCS2, getAsciiBytes (cp)); } if (cd == -1) { LastMbcsToUCS2Failed = true; cd = UTF8ToUCS2; if (cd == -1) cd = UTF8ToUCS2 = OS.iconv_open (UCS2, UTF8); if (cd == -1) return EMPTY_CHAR_ARRAY; cd = LastMbcsToUTF8; if (cd == -1) cd = LastMbcsToUTF8 = OS.iconv_open (UTF8, getAsciiBytes (cp)); } if (cd == -1) return EMPTY_CHAR_ARRAY; boolean utf8 = cd == LastMbcsToUTF8; int inByteCount = length; int outByteCount = utf8 ? length * 6 : length * 2; int ptr1 = 0, ptr2 = 0, ptr3 = 0; if (length <= BufferSize) { ptr1 = MbcsBuffer; ptr2 = Utf8Buffer; ptr3 = Ucs2Buffer; } else { ptr1 = OS.XtMalloc (inByteCount); if (utf8) ptr2 = OS.XtMalloc (length * 6); ptr3 = OS.XtMalloc (length * 2); } int ptr = utf8 ? ptr2 : ptr3; int [] inBuffer = {ptr1}; int [] inBytesLeft = {inByteCount}; int [] outBuffer = {ptr}; int [] outBytesLeft = {outByteCount}; OS.memmove (ptr1, buffer, inByteCount); OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); outByteCount = outBuffer [0] - ptr; if (utf8) { cd = UTF8ToUCS2; inByteCount = outByteCount; outByteCount = length * 2; inBuffer[0] = ptr2; inBytesLeft[0] = inByteCount; outBuffer[0] = ptr3; outBytesLeft [0]= outByteCount; OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); outByteCount = outBuffer [0] - ptr3; } wideCharStr = new char [outByteCount / 2]; OS.memmove (wideCharStr, ptr3, outByteCount); if (ptr1 != 0 && ptr1 != MbcsBuffer) OS.XtFree (ptr1); if (ptr2 != 0 && ptr2 != Utf8Buffer) OS.XtFree (ptr2); if (ptr3 != 0 && ptr3 != Ucs2Buffer) OS.XtFree (ptr3); } return wideCharStr; } } return wideCharStr; } /** * Free any cached resources. */ public static void release () { synchronized (LOCK) { if (Ucs2Buffer != 0) OS.XtFree (Ucs2Buffer); if (Utf8Buffer != 0) OS.XtFree (Utf8Buffer); if (MbcsBuffer != 0) OS.XtFree (MbcsBuffer); if (LastUCS2ToMbcs != -1) OS.iconv_close (LastUCS2ToMbcs); if (LastUTF8ToMbcs != -1) OS.iconv_close (LastUTF8ToMbcs); if (LastMbcsToUCS2 != -1) OS.iconv_close (LastMbcsToUCS2); if (LastMbcsToUTF8 != -1) OS.iconv_close (LastMbcsToUTF8); if (UTF8ToUCS2 != -1) OS.iconv_close (UTF8ToUCS2); if (UCS2ToUTF8 != -1) OS.iconv_close (UCS2ToUTF8); LastUCS2ToMbcs = LastUTF8ToMbcs = LastMbcsToUCS2 = LastMbcsToUTF8 = UTF8ToUCS2 = UCS2ToUTF8 -1; Ucs2Buffer = Utf8Buffer = MbcsBuffer = 0; } } /** * Converts an array of chars (containing unicode data) to an array * of bytes representing the platform's encoding, of those characters * in the given code page. * * @param codePage the code page to use for conversion * @param buffer the array of chars to be converted * @return the platform encoding */ public static byte [] wcsToMbcs (String codePage, char [] buffer) { return wcsToMbcs (codePage, buffer, false); } /** * Converts an array of chars (containing unicode data) to an array * of bytes representing the platform's encoding, of those characters * in the given code page. If the termination flag is true, the resulting * byte data will be null (zero) terminated. * * @param codePage the code page to use for conversion * @param buffer the array of chars to be converted * @param terminate <code>true</code> if the result should be null terminated and false otherwise. * @return the platform encoding */ public static byte [] wcsToMbcs (String codePage, char [] buffer, boolean terminate) { /* Check for the simple cases */ if (buffer == null) { return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; } int length = buffer.length; if (length == 0) { return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; } /* * Optimize for English ASCII encoding. This optimization * relies on the fact that lead bytes can never be in the * range 0..0x7F. */ byte [] mbcs = new byte [(terminate) ? length + 1 : length]; for (int i=0; i<length; i++) { if ((buffer [i] & 0xFFFF) <= 0x7F) { mbcs [i] = (byte) buffer [i]; } else { synchronized (LOCK) { /* * Feature in Solaris. Some Solaris machines do not provide an iconv * decoder/encoder that converts directly from/to any MBCS encoding to/from * USC-2. The fix is to convert to UTF-8 enconding first and them * convert to UCS-2. */ String cp = codePage != null ? codePage : CodePage; if (cp != LastUCS2ToMbcsCodePage && !cp.equals (LastUCS2ToMbcsCodePage)) { if (LastUCS2ToMbcs != -1) OS.iconv_close (LastUCS2ToMbcs); if (LastUTF8ToMbcs != -1) OS.iconv_close (LastUTF8ToMbcs); LastUCS2ToMbcs = LastUTF8ToMbcs = -1; LastUCS2ToMbcsCodePage = cp; } int cd = LastUCS2ToMbcs; if (cd == -1 && !LastUCS2ToMbcsFailed) { cd = LastUCS2ToMbcs = OS.iconv_open (getAsciiBytes (cp), UCS2); } if (cd == -1) { LastUCS2ToMbcsFailed = true; cd = LastUTF8ToMbcs; if (cd == -1) cd = LastUTF8ToMbcs = OS.iconv_open (getAsciiBytes (cp), UTF8); if (cd == -1) return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; cd = UCS2ToUTF8; if (cd == -1) cd = UCS2ToUTF8 = OS.iconv_open (UTF8, UCS2); } if (cd == -1) return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; boolean utf8 = cd == UCS2ToUTF8; int inByteCount = length * 2; int outByteCount = length * 6; int ptr1 = 0, ptr2 = 0, ptr3 = 0; if (length <= BufferSize) { ptr1 = Ucs2Buffer; ptr2 = Utf8Buffer; ptr3 = MbcsBuffer; } else { ptr1 = OS.XtMalloc (inByteCount); if (utf8) ptr2 = OS.XtMalloc (outByteCount); ptr3 = OS.XtMalloc (outByteCount); } int ptr = utf8 ? ptr2 : ptr3; int [] inBuffer = {ptr1}; int [] inBytesLeft = {inByteCount}; int [] outBuffer = {ptr}; int [] outBytesLeft = {outByteCount}; OS.memmove (ptr1, buffer, inByteCount); while (inBytesLeft [0] > 0) { OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); if (inBytesLeft [0] != 0) { inBuffer [0] += 2; inBytesLeft [0] -= 2; } } outByteCount = outBuffer [0] - ptr; if (utf8) { cd = LastUTF8ToMbcs; inByteCount = outByteCount; outByteCount = length * 6; inBuffer[0] = ptr2; inBytesLeft[0] = inByteCount; outBuffer[0] = ptr3; outBytesLeft [0]= outByteCount; OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); outByteCount = outBuffer [0] - ptr3; } mbcs = new byte [terminate ? outByteCount + 1 : outByteCount]; OS.memmove (mbcs, ptr3, outByteCount); if (ptr1 != 0 && ptr1 != Ucs2Buffer) OS.XtFree (ptr1); if (ptr2 != 0 && ptr2 != Utf8Buffer) OS.XtFree (ptr2); if (ptr3 != 0 && ptr3 != MbcsBuffer) OS.XtFree (ptr3); } return mbcs; } } return mbcs; } /** * Converts a String (containing unicode data) to an array * of bytes representing the platform's encoding, of those characters * in the given code page. * * @param codePage the code page to use for conversion * @param string the string to be converted * @return the platform encoding */ public static byte [] wcsToMbcs (String codePage, String string) { return wcsToMbcs (codePage, string, false); } /** * Converts a String (containing unicode data) to an array * of bytes representing the platform's encoding, of those characters * in the given code page. If the termination flag is true, the resulting * byte data will be null (zero) terminated. * * @param codePage the code page to use for conversion * @param string the string to be converted * @param terminate <code>true</code> if the result should be null terminated and false otherwise. * @return the platform encoding */ public static byte [] wcsToMbcs (String codePage, String string, boolean terminate) { if (terminate) { if (string == null) return NULL_BYTE_ARRAY; int count = string.length (); char [] buffer = new char [count + 1]; string.getChars (0, count, buffer, 0); return wcsToMbcs (codePage, buffer, false); } else { if (string == null) return EMPTY_BYTE_ARRAY; int count = string.length (); char [] buffer = new char [count]; string.getChars (0, count, buffer, 0); return wcsToMbcs (codePage, buffer, false); } } }